rm(list = ls(all.names = TRUE)) #clear R environment
library(readr)
library(ggplot2)
library(GGally)
library(speakr)
library(dplyr)
library(tidyverse)
library(readr)
library(plyr)
library(stringi)
library(purrr)
library(broom)
library(scatterplot3d)
library(tidyr)
library(readr)
library(readxl)
#Analysis of all_data ## Tidy all_data all_data df includes all 58 participants, two groups. Only total correct responses are included for each experimental task (flanker, stroop, backward).
#Load working data
all_data <- read.csv("/Users/princesa/Desktop/diana/all_data.csv", header = TRUE)
#summary
summary(all_data)
## participant age IQ backward_correct
## Length:58 Min. :22.00 Min. : 60.00 Min. :0.000
## Class :character 1st Qu.:32.00 1st Qu.: 96.25 1st Qu.:1.000
## Mode :character Median :38.00 Median :107.00 Median :4.000
## Mean :39.19 Mean :108.09 Mean :3.138
## 3rd Qu.:47.00 3rd Qu.:122.25 3rd Qu.:5.000
## Max. :59.00 Max. :146.00 Max. :6.000
## flanker_correct stroop_correct
## Min. : 0.00 Min. : 0.00
## 1st Qu.:40.50 1st Qu.:38.00
## Median :45.00 Median :39.00
## Mean :41.91 Mean :36.83
## 3rd Qu.:47.00 3rd Qu.:40.00
## Max. :49.00 Max. :40.00
#separate groups: nonLanguage teachers and language teachers
nonLanguage <- all_data[1:36, ]
language <- all_data[37:58, ]
nonLanguage$language = "NonLanguage"
language$language = "Language"
#adding column to indicate nonLanguage or language for each participant
all_data = bind_rows(nonLanguage,language)
##Data Visualization of all participants with ggplot
#all_data plot
ggplot(all_data, aes(x = age, y = IQ)) +
#geom_jitter(width = .2)
geom_point(size = 1) +
ggtitle("Language Plot")
#geom_smooth(method = "") +
#facet_grid(language~backward_correct)
Data Analysis for nonLanguage Group {.tabset} The nonLanguage group consist of the teachers who only use one language in their professional work setting.
data(nonLanguage)
## Warning in data(nonLanguage): data set 'nonLanguage' not found
head(nonLanguage)
## participant age IQ backward_correct flanker_correct stroop_correct
## 1 adnm2 36 135 5 45 39
## 2 bjfc1 40 108 4 44 38
## 3 buzt0 30 146 5 49 39
## 4 bwat2 34 128 1 46 39
## 5 cbky9 49 135 4 48 40
## 6 cfug6 34 63 0 27 2
## language
## 1 NonLanguage
## 2 NonLanguage
## 3 NonLanguage
## 4 NonLanguage
## 5 NonLanguage
## 6 NonLanguage
##plot
plot(backward_correct ~ age + IQ, data = nonLanguage, pch=16)
#linear model
backward_model_nonLanguage <- lm(backward_correct ~ age + IQ, data = nonLanguage)
#abline(backward_model_nonLanguage)
my_results <- tidy(backward_model_nonLanguage)
my_results
## # A tibble: 3 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -0.194 2.35 -0.0828 0.934
## 2 age -0.00622 0.0366 -0.170 0.866
## 3 IQ 0.0316 0.0154 2.05 0.0482
backward_model_nonLanguage
##
## Call:
## lm(formula = backward_correct ~ age + IQ, data = nonLanguage)
##
## Coefficients:
## (Intercept) age IQ
## -0.194378 -0.006223 0.031591
summary(backward_model_nonLanguage)
##
## Call:
## lm(formula = backward_correct ~ age + IQ, data = nonLanguage)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3658 -2.0974 0.6713 1.3846 2.6145
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.194378 2.346418 -0.083 0.9345
## age -0.006223 0.036562 -0.170 0.8659
## IQ 0.031591 0.015400 2.051 0.0482 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.934 on 33 degrees of freedom
## Multiple R-squared: 0.1174, Adjusted R-squared: 0.06393
## F-statistic: 2.195 on 2 and 33 DF, p-value: 0.1273
par(mfrow = c(2,2))
plot(backward_model_nonLanguage)
AIC(backward_model_nonLanguage)
## [1] 154.5309
BIC(backward_model_nonLanguage)
## [1] 160.865
data(nonLanguage)
## Warning in data(nonLanguage): data set 'nonLanguage' not found
head(nonLanguage)
## participant age IQ backward_correct flanker_correct stroop_correct
## 1 adnm2 36 135 5 45 39
## 2 bjfc1 40 108 4 44 38
## 3 buzt0 30 146 5 49 39
## 4 bwat2 34 128 1 46 39
## 5 cbky9 49 135 4 48 40
## 6 cfug6 34 63 0 27 2
## language
## 1 NonLanguage
## 2 NonLanguage
## 3 NonLanguage
## 4 NonLanguage
## 5 NonLanguage
## 6 NonLanguage
##plot
plot(flanker_correct ~ age + IQ, data = nonLanguage, pch=16)
#linear model
flanker_model_nonLanguage <- lm(flanker_correct ~ age + IQ, data = nonLanguage)
#abline(flanker_model_nonLanguage)
my_results <- tidy(flanker_model_nonLanguage)
my_results
## # A tibble: 3 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 28.1 6.91 4.07 0.000278
## 2 age 0.0766 0.108 0.711 0.482
## 3 IQ 0.104 0.0454 2.30 0.0282
flanker_model_nonLanguage
##
## Call:
## lm(formula = flanker_correct ~ age + IQ, data = nonLanguage)
##
## Coefficients:
## (Intercept) age IQ
## 28.11861 0.07663 0.10418
summary(flanker_model_nonLanguage)
##
## Call:
## lm(formula = flanker_correct ~ age + IQ, data = nonLanguage)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.9680 -0.4283 1.4999 3.2503 8.2520
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.11861 6.91314 4.067 0.000278 ***
## age 0.07663 0.10772 0.711 0.481859
## IQ 0.10418 0.04537 2.296 0.028156 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.699 on 33 degrees of freedom
## Multiple R-squared: 0.1419, Adjusted R-squared: 0.08992
## F-statistic: 2.729 on 2 and 33 DF, p-value: 0.08002
par(mfrow = c(2,2))
plot(flanker_model_nonLanguage)
AIC(flanker_model_nonLanguage)
## [1] 232.3294
BIC(flanker_model_nonLanguage)
## [1] 238.6635
data(nonLanguage)
## Warning in data(nonLanguage): data set 'nonLanguage' not found
head(nonLanguage)
## participant age IQ backward_correct flanker_correct stroop_correct
## 1 adnm2 36 135 5 45 39
## 2 bjfc1 40 108 4 44 38
## 3 buzt0 30 146 5 49 39
## 4 bwat2 34 128 1 46 39
## 5 cbky9 49 135 4 48 40
## 6 cfug6 34 63 0 27 2
## language
## 1 NonLanguage
## 2 NonLanguage
## 3 NonLanguage
## 4 NonLanguage
## 5 NonLanguage
## 6 NonLanguage
##plot
plot(stroop_correct ~ age + IQ, data = nonLanguage, pch=16)
#linear model
stroop_model_nonLanguage <- lm(stroop_correct ~ age + IQ, data = nonLanguage)
abline(stroop_model_nonLanguage)
## Warning in abline(stroop_model_nonLanguage): only using the first two of 3
## regression coefficients
my_results <- tidy(stroop_model_nonLanguage)
my_results
## # A tibble: 3 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 19.4 6.94 2.79 0.00863
## 2 age 0.0714 0.108 0.660 0.514
## 3 IQ 0.138 0.0456 3.03 0.00474
stroop_model_nonLanguage
##
## Call:
## lm(formula = stroop_correct ~ age + IQ, data = nonLanguage)
##
## Coefficients:
## (Intercept) age IQ
## 19.38713 0.07142 0.13802
summary(stroop_model_nonLanguage)
##
## Call:
## lm(formula = stroop_correct ~ age + IQ, data = nonLanguage)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.5108 -1.5472 0.5263 2.1878 7.3319
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.38713 6.94219 2.793 0.00863 **
## age 0.07142 0.10817 0.660 0.51370
## IQ 0.13802 0.04556 3.029 0.00474 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.723 on 33 degrees of freedom
## Multiple R-squared: 0.2191, Adjusted R-squared: 0.1718
## F-statistic: 4.63 on 2 and 33 DF, p-value: 0.01689
par(mfrow = c(2,2))
plot(stroop_model_nonLanguage)
AIC(stroop_model_nonLanguage)
## [1] 232.6312
BIC(stroop_model_nonLanguage)
## [1] 238.9653
The language group consist of the teachers who use more than one language in their professional work setting.
data(language)
## Warning in data(language): data set 'language' not found
head(language)
## participant age IQ backward_correct flanker_correct stroop_correct language
## 37 bi_ajli0 35 94 4 45 38 Language
## 38 bi_dckv1 30 105 4 49 38 Language
## 39 bi_dvyf7 58 105 5 37 39 Language
## 40 bi_evur7 42 93 0 40 34 Language
## 41 bi_giah0 23 102 5 44 39 Language
## 42 bi_igsy3 50 120 6 49 39 Language
#plot Language
plot(backward_correct ~ age + IQ, data = language, pch=16)
#linear model
backward_model_language <- lm(backward_correct ~ age + IQ, data = language)
abline(backward_model_language)
## Warning in abline(backward_model_language): only using the first two of 3
## regression coefficients
my_results <- tidy(backward_model_language)
my_results
## # A tibble: 3 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -7.69 2.80 -2.75 0.0128
## 2 age -0.0609 0.0269 -2.27 0.0352
## 3 IQ 0.129 0.0258 5.00 0.0000795
backward_model_language
##
## Call:
## lm(formula = backward_correct ~ age + IQ, data = language)
##
## Coefficients:
## (Intercept) age IQ
## -7.69311 -0.06092 0.12908
summary(backward_model_language)
##
## Call:
## lm(formula = backward_correct ~ age + IQ, data = language)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.32845 -0.81186 -0.07596 0.90055 2.67241
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.69311 2.79829 -2.749 0.0128 *
## age -0.06092 0.02687 -2.267 0.0352 *
## IQ 0.12908 0.02582 5.000 7.95e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.375 on 19 degrees of freedom
## Multiple R-squared: 0.5937, Adjusted R-squared: 0.5509
## F-statistic: 13.88 on 2 and 19 DF, p-value: 0.0001923
par(mfrow = c(2,2))
plot(backward_model_language)
AIC(backward_model_language)
## [1] 81.20737
BIC(backward_model_language)
## [1] 85.57154
data(language)
## Warning in data(language): data set 'language' not found
head(language)
## participant age IQ backward_correct flanker_correct stroop_correct language
## 37 bi_ajli0 35 94 4 45 38 Language
## 38 bi_dckv1 30 105 4 49 38 Language
## 39 bi_dvyf7 58 105 5 37 39 Language
## 40 bi_evur7 42 93 0 40 34 Language
## 41 bi_giah0 23 102 5 44 39 Language
## 42 bi_igsy3 50 120 6 49 39 Language
#plot Language
plot(flanker_correct ~ age + IQ, data = language, pch=16)
#linear model
flanker_model_language <- lm(flanker_correct ~ age + IQ, data = language)
abline(flanker_model_language)
## Warning in abline(flanker_model_language): only using the first two of 3
## regression coefficients
my_results <- tidy(flanker_model_language)
my_results
## # A tibble: 3 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 12.1 20.1 0.605 0.552
## 2 age -0.425 0.193 -2.21 0.0399
## 3 IQ 0.443 0.185 2.39 0.0271
flanker_model_language
##
## Call:
## lm(formula = flanker_correct ~ age + IQ, data = language)
##
## Coefficients:
## (Intercept) age IQ
## 12.1411 -0.4251 0.4432
summary(flanker_model_language)
##
## Call:
## lm(formula = flanker_correct ~ age + IQ, data = language)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.278 -1.917 2.854 4.847 11.242
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.1411 20.0687 0.605 0.5523
## age -0.4251 0.1927 -2.207 0.0399 *
## IQ 0.4432 0.1851 2.394 0.0271 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.858 on 19 degrees of freedom
## Multiple R-squared: 0.3319, Adjusted R-squared: 0.2616
## F-statistic: 4.719 on 2 and 19 DF, p-value: 0.02168
par(mfrow = c(2,2))
plot(flanker_model_language)
AIC(flanker_model_language)
## [1] 167.8941
BIC(flanker_model_language)
## [1] 172.2582
data(language)
## Warning in data(language): data set 'language' not found
head(language)
## participant age IQ backward_correct flanker_correct stroop_correct language
## 37 bi_ajli0 35 94 4 45 38 Language
## 38 bi_dckv1 30 105 4 49 38 Language
## 39 bi_dvyf7 58 105 5 37 39 Language
## 40 bi_evur7 42 93 0 40 34 Language
## 41 bi_giah0 23 102 5 44 39 Language
## 42 bi_igsy3 50 120 6 49 39 Language
#plot Language
par(mfrow = c(2,2))
plot(stroop_correct ~ age + IQ, data = language, pch=16)
#linear model
stroop_model_language <- lm(stroop_correct ~ age + IQ, data = language)
par(mfrow = c(2,2))
plot(stroop_model_language)
my_results <- tidy(stroop_model_language)
my_results
## # A tibble: 3 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 13.6 14.9 0.914 0.372
## 2 age -0.313 0.143 -2.18 0.0417
## 3 IQ 0.338 0.138 2.45 0.0240
stroop_model_language
##
## Call:
## lm(formula = stroop_correct ~ age + IQ, data = language)
##
## Coefficients:
## (Intercept) age IQ
## 13.6334 -0.3127 0.3376
summary(stroop_model_language)
##
## Call:
## lm(formula = stroop_correct ~ age + IQ, data = language)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.5774 -1.6728 0.3679 2.9557 11.9590
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.6334 14.9160 0.914 0.3722
## age -0.3127 0.1432 -2.184 0.0417 *
## IQ 0.3376 0.1376 2.453 0.0240 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.327 on 19 degrees of freedom
## Multiple R-squared: 0.3359, Adjusted R-squared: 0.266
## F-statistic: 4.805 on 2 and 19 DF, p-value: 0.02048
par(mfrow = c(2,2))
plot(stroop_model_language)
AIC(stroop_model_language)
## [1] 154.8383
BIC(stroop_model_language)
## [1] 159.2025
#ADDITIONAL ANALYSIS
attach(all_data) #using objects in df all_data
## The following object is masked _by_ .GlobalEnv:
##
## language
hist(backward_correct) #this could be considered "normal"
hist(flanker_correct) #not normal
hist(stroop_correct) #not normal
summary(all_data)
## participant age IQ backward_correct
## Length:58 Min. :22.00 Min. : 60.00 Min. :0.000
## Class :character 1st Qu.:32.00 1st Qu.: 96.25 1st Qu.:1.000
## Mode :character Median :38.00 Median :107.00 Median :4.000
## Mean :39.19 Mean :108.09 Mean :3.138
## 3rd Qu.:47.00 3rd Qu.:122.25 3rd Qu.:5.000
## Max. :59.00 Max. :146.00 Max. :6.000
## flanker_correct stroop_correct language
## Min. : 0.00 Min. : 0.00 Length:58
## 1st Qu.:40.50 1st Qu.:38.00 Class :character
## Median :45.00 Median :39.00 Mode :character
## Mean :41.91 Mean :36.83
## 3rd Qu.:47.00 3rd Qu.:40.00
## Max. :49.00 Max. :40.00
Attempted to normalize data to see if it would make a difference.
attach(all_data)
## The following object is masked _by_ .GlobalEnv:
##
## language
## The following objects are masked from all_data (pos = 3):
##
## age, backward_correct, flanker_correct, IQ, language, participant,
## stroop_correct
scaledflank <- scale(flanker_correct)
scaledstroop <- scale(stroop_correct)
scaledback <- scale(backward_correct)
lang = substring(participant,1,3)=="bi_"
biling = rep("false",length(lang)) #make this variable all FALSE
biling[lang]="true" #
data <- data.frame(age,IQ,scaledflank,scaledstroop,scaledback,biling)
#colnames(data)=c("age","IQ","scaledflank","scaledstroop","scaledback","biling") #not needed but may use later
hist(scaledback)
hist(scaledstroop)
hist(scaledflank)
#——————————————## TESTING Additional plots
ggplot(data = all_data) +
geom_point(mapping = aes(x = backward_correct, y = IQ, color = age))
scatterplot3d(
all_data[1:4], pch = 19, color = "steelblue",
grid = TRUE, box = FALSE,
mar = c(3, 3, 0.5, 3)
)
# nonLanguage Scatterplot
ggplot(nonLanguage, aes(backward_correct, IQ)) +
geom_point(fill = "dark green", color = "black",
size = 5, shape = 21) +
ggtitle("LanguagePlot")
#————————————————————————#
##Removing line 45
#created new data with deleted row 45
new_all_data <- all_data[-c(45),]
#created new language group (-45)
new_language <- new_all_data[37:58, ]
#plot Language
plot(flanker_correct ~ age + IQ, data = new_language, pch=16)
#linear model
flanker_model_language <- lm(flanker_correct ~ age + IQ, data = new_language)
abline(flanker_model_language)
## Warning in abline(flanker_model_language): only using the first two of 3
## regression coefficients
my_results <- tidy(flanker_model_language)
my_results
## # A tibble: 3 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 21.2 13.8 1.54 0.141
## 2 age -0.155 0.142 -1.09 0.290
## 3 IQ 0.266 0.131 2.03 0.0573
flanker_model_language
##
## Call:
## lm(formula = flanker_correct ~ age + IQ, data = new_language)
##
## Coefficients:
## (Intercept) age IQ
## 21.2068 -0.1552 0.2660
summary(flanker_model_language)
##
## Call:
## lm(formula = flanker_correct ~ age + IQ, data = new_language)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.7273 -0.7667 2.4163 3.6353 6.0239
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.2068 13.7539 1.542 0.1405
## age -0.1552 0.1423 -1.090 0.2899
## IQ 0.2660 0.1310 2.031 0.0573 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.693 on 18 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.2021, Adjusted R-squared: 0.1135
## F-statistic: 2.28 on 2 and 18 DF, p-value: 0.131
par(mfrow = c(2,2))
plot(flanker_model_language)
##
#plot Language
par(mfrow = c(2,2))
plot(stroop_correct ~ age + IQ, data = new_language, pch=16)
#linear model
stroop_model_language <- lm(stroop_correct ~ age + IQ, data = new_language)
par(mfrow = c(2,2))
plot(stroop_model_language)
my_results <- tidy(stroop_model_language)
my_results
## # A tibble: 3 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 22.2 4.87 4.55 0.000250
## 2 age -0.0590 0.0504 -1.17 0.257
## 3 IQ 0.171 0.0464 3.69 0.00169
stroop_model_language
##
## Call:
## lm(formula = stroop_correct ~ age + IQ, data = new_language)
##
## Coefficients:
## (Intercept) age IQ
## 22.15387 -0.05899 0.17098
summary(stroop_model_language)
##
## Call:
## lm(formula = stroop_correct ~ age + IQ, data = new_language)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.4736 -1.1456 -0.2982 1.4977 4.4921
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.15387 4.87196 4.547 0.00025 ***
## age -0.05899 0.05041 -1.170 0.25715
## IQ 0.17098 0.04639 3.686 0.00169 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.371 on 18 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4325, Adjusted R-squared: 0.3695
## F-statistic: 6.86 on 2 and 18 DF, p-value: 0.006101